# Functions

# Plot color palette
plot_color_palette <- function(input_cols) {
  
  col_data <- tibble(color = input_cols) %>%
    mutate(color =  fct_inorder(color))
  
  res <- col_data %>%
    ggplot(aes(x = "color", fill = color)) +
    geom_bar() +
    scale_fill_manual(values = input_cols) +
    theme_void()
  
  res
}

# Create color palette
create_gradient <- function(cols_in, n = NULL) {
  
  if (is.null(n)) {
    n <- length(cols_in)
  }
  
  colorRampPalette(cols_in)(n)
}

create_col_fun <- function(cols_in) {
  
  function(n = NULL) {
    create_gradient(cols_in, n)
  }
}

# Pull items from list of vectors
pull_nest_vec <- function(list_in, idx) {
  res <- map_chr(list_in, ~ .x[[idx]])
  
  res
}

# Capitalize first character without modifying other characters
str_to_title_v2 <- function(string) {
  
  cap_first_chr <- function(string) {
    chrs <- string %>%
      str_split(pattern = "") %>%
      unlist()
    
    if (any(chrs %in% LETTERS)) {
      return(string)
    }
    
    chrs[1] <- str_to_upper(chrs[1])
    
    res <- chrs %>%
      reduce(str_c)
    
    res
  }
  
  res <- string %>%
    map_chr(~ {
      .x %>%
        str_split(pattern = " ") %>%
        unlist() %>%
        map_chr(cap_first_chr) %>%
        reduce(str_c, sep = " ")
    })
  
  res
}

# Set colors
set_cols <- function(types_in, cols_in, other_cols) {
  
  types_in <- types_in[!types_in %in% names(other_cols)]
  cols_in <- cols_in[!cols_in %in% other_cols]
  
  names(cols_in) <- types_in
  cols_in <- cols_in[!is.na(names(cols_in))]
  
  res <- c(cols_in, other_cols)
  
  res
}

# Set subtype colors
set_type_cols <- function(type_in, sobjs_in, type_key, type_column = "subtype",
                          cols_in, other_cols) {
  
  sobjs_in <- sobjs_in[type_key[names(sobjs_in)] == type_in]
  
  res <- sobjs_in %>%
    map(~ {
      .x@meta.data %>%
        pull(type_column) %>%
        unique()
    }) %>%
    reduce(c) %>%
    unique() %>%
    set_cols(
      cols_in = cols_in,
      other_cols = other_cols
    )
  
  res
}

# Subset Seurat objects for plotting
subset_sobj <- function(sobj_in, name, type_vec, include_list, type_column = "cell_type1", subtype_column = "cell_type2",
                        clust_column = "cell_type3", data_column = "ova_fc", min_cells = 15, cap_names = T,
                        orig_idents = c("_1" = "CD45_neg", "_2" = "CD45_pos"),
                        type_filt = c("DC" = "CD45_pos", "LEC" = "CD45_neg", "fibroblast" = "CD45_neg"), ...) {
  
  res <- sobj_in
  
  # Pull info for cell_type from reference lists
  type_in <- type_vec[[name]]
  include_types <- include_list[[name]]
  
  # Set cell type, subtype, and cluster columns in meta.data
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_id") %>%
    mutate(
      cell_type       = !!sym(type_column),
      subtype         = !!sym(subtype_column),
      subtype_cluster = !!sym(clust_column)
    ) %>%
    column_to_rownames("cell_id")
  
  # Set orig.ident based on cell_id suffix
  if (!is.null(orig_idents)) {
    res@meta.data <- res@meta.data %>%
      rownames_to_column("cell_id") %>%
      mutate(
        orig.ident = str_extract(cell_id, "_[0-9]+$"),
        orig.ident = orig_idents[orig.ident]
      ) %>%
      column_to_rownames("cell_id")
  }
  
  # Filter cells based on orig.ident
  if (!is.null(type_filt)) {
    res <- res %>%
      subset(subset = orig.ident == type_filt[type_in])
  }
  
  # Filter cells based on input cell type
  res <- res %>%
    subset(subset = cell_type %in% c(type_in, include_types))
  
  if (!type_in %in% pull(res@meta.data, cell_type)) {
    stop(str_c("ERROR: Cell type not present after filtering for ", type_filt[type_in]))
  }
  
  # Filter cells based on number of cells per subtype
  if (!is.null(min_cells)) {
    keep_cells <- res@meta.data %>%
      rownames_to_column("cell_id") %>%
      group_by(subtype) %>%
      filter(n_distinct(cell_id) > min_cells) %>%
      pull(cell_id)
    
    res <- res %>%
      subset(cells = keep_cells)
  }
  
  # Score cell cycle genes
  s.genes <- cc.genes$s.genes %>%
    str_to_title()
  
  g2m.genes <- cc.genes$g2m.genes %>%
    str_to_title()
  
  res <- res %>%
    CellCycleScoring(
      s.features   = s.genes, 
      g2m.features = g2m.genes
    )
  
  # Re-process object
  res <- res %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData(vars.to.regress = c("S.Score", "G2M.Score")) %>%
    RunPCA() %>%
    RunUMAP(dims = 1:40) %>%
    AddMetaData(FetchData(., c("PC_1", "PC_2"))) %>%
    AddMetaData(FetchData(., c("UMAP_1", "UMAP_2")))
  
  # Add pseudo count
  if (0 %in% pull(res@meta.data, !!sym(data_column))) {
    res@meta.data <- res@meta.data %>%
      rownames_to_column("cell_id") %>%
      mutate(
        pseudo = ifelse(!!sym(data_column) > 0, !!sym(data_column), NA),
        pseudo = min(pseudo, na.rm = T) * 0.5,
        !!sym(data_column) := !!sym(data_column) + pseudo
      ) %>%
      column_to_rownames("cell_id")
  }

  # Capitalize subtypes
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_id") %>%
    mutate(
      subtype = str_to_title_v2(subtype),
      subtype_cluster = str_to_title_v2(subtype_cluster)
    ) %>%
    column_to_rownames("cell_id")
  
  res
}

# Re-cluster and run clustify
run_clustifyr <- function(sobj_in, name, type_vec, resolution = 1.8, type_column = "cell_type",
                          subtype_column = "subtype", clust_column = "subtype_cluster") {
  
  res <- sobj_in
  
  type_in <- type_vec[[name]]
  
  ref <- params$sobj_refs[[type_in]] %>%
    here()
  
  if (is_empty(ref)) {
    return(res)
  }
  
  load(ref)
  
  type_column    <- sym(type_column)
  subtype_column <- sym(subtype_column)
  clust_column   <- sym(clust_column)
  
  ref <- ref %>%
    basename() %>% 
    str_remove("\\.rda$")
  
  res <- sobj_in %>%
    FindNeighbors(
      reduction = "pca",
      dims = 1:40
    ) %>%
    FindClusters(
      resolution = resolution,
      verbose = F
    ) %>%
    clustify(
      ref_mat = eval(parse(text = ref)),
      cluster_col = "seurat_clusters"
    )
  
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_id") %>%
    mutate(
      !!subtype_column := if_else(!!type_column == type_in, type, !!type_column),
      !!subtype_column := str_to_title_v2(!!subtype_column),
      !!clust_column   := if_else(!!type_column == type_in, str_c(seurat_clusters, "-", !!subtype_column), !!subtype_column)
    ) %>%
    rename(
      UMAP_1 = UMAP_1.x,
      UMAP_2 = UMAP_2.x
    ) %>%
    column_to_rownames("cell_id")
  
  res
}

# Filter list of Seurat objects for patient, normalize and merge objects 
merge_sobj <- function(sobj_list, sample_order = NULL) {

  res <- merge(
    x = sobj_list[[1]],
    y = sobj_list[2:length(sobj_list)],
    add.cell.ids = names(sobj_list)
  ) %>%
    ScaleData(assay = "RNA") %>%
    ScaleData(assay = "adt") %>%
    FindVariableFeatures(assay = "RNA")
  
  # Set sample order
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_ids") %>%
    mutate(orig.ident = fct_relevel(orig.ident, sample_order)) %>%
    column_to_rownames("cell_ids")
  
  res
}

# Fit gaussian mixture model for given signal
fit_GMM <- function(sobj_in, data_column = "adt_ovalbumin", data_slot = "counts", prob = 0.5, quiet = F) {
  
  set.seed(42)
  
  # Fit GMM for OVA signal
  data_df <- sobj_in %>%
    FetchData(data_column, slot = data_slot) %>%
    rownames_to_column("cell_id") %>%
    mutate(!!sym(data_column) := !!sym(data_column) + 1) %>%
    column_to_rownames("cell_id")
  
  quiet_EM <- quietly(~ normalmixEM(.))
  
  if (!quiet) {
    quiet_EM <- normalmixEM
  }
  
  mixmdl <- data_df %>%
    pull(data_column) %>%
    quiet_EM()
  
  if (quiet) {
    mixmdl <- mixmdl$result
  }
  
  # New column names
  ova_names <- c("Low", "High")
  comp_names <- c("comp.1", "comp.2")
  
  if (mixmdl$mu[1] > mixmdl$mu[2]) {
    ova_names <- rev(ova_names)
  }
  
  names(comp_names)    <- ova_names
  names(mixmdl$mu)     <- ova_names
  names(mixmdl$sigma)  <- ova_names
  names(mixmdl$lambda) <- ova_names

  # Divide into OVA groups
  res <- data.frame(
    cell_id = rownames(data_df),
    data = data_df[, data_column],
    mixmdl$posterior
  ) %>%
    dplyr::rename(!!sym(data_column) := data) %>%
    rename(all_of(comp_names)) %>%
    mutate(GMM_grp = if_else(High >= prob, "High", "Low")) %>%
    column_to_rownames("cell_id")
  
  res <- list(
    res    = res,
    mu     = mixmdl$mu,
    sigma  = mixmdl$sigma,
    lambda = mixmdl$lambda
  )
  
  res
}

# Classify cells based on OVA signal
classify_ova <- function(sobj_in, filt_column = "cell_type", filt = NULL, data_column = "adt_ovalbumin", data_slot = "counts", ...) {
  
  # Filter Seurat object
  sobj_filt <- sobj_in
  
  if (!is.null(filt)) {
    sobj_filt <- sobj_filt %>%
      subset(!!sym(filt_column) == filt)
  }
  
  # Fit GMM
  gmm_res <- sobj_filt %>%
    fit_GMM(
      data_column = data_column,
      data_slot   = data_slot,
      ...
    )
  
  gmm_df <- gmm_res$res %>%
    rownames_to_column("cell_id") %>%
    mutate(
      mu = gmm_res$mu[GMM_grp],
      GMM_grp = str_to_lower(GMM_grp),
      GMM_grp = str_c("ova ", GMM_grp),
    ) %>%
    column_to_rownames("cell_id")
  
  # Add OVA groups to meta.data
  res <- sobj_in %>%
    AddMetaData(gmm_df)
  
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_id") %>%
    mutate(GMM_grp = if_else(is.na(GMM_grp), "Other", GMM_grp)) %>%
    column_to_rownames("cell_id")
  
  res
}

# Add distribution of GMM component to plot
add_stat_fun <- function(gmm_in, cols_in, key) {
  # dnorm provides density for the normal distribution given the mean and
  # standard deviation. Lambda is used to adjust for the mixture composition.
  # mu: Mean of component
  # sig: Standard deviation of component
  # lam: Lambda of component (mixture weight)
  plot_mix_comps <- function(x, mu, sigma, lam) {
    lam * dnorm(x, mu, sigma)
  }

  stat_function(
    geom  = "line",
    fun   = plot_mix_comps,
    args  = list(gmm_in$mu[key], gmm_in$sigma[key], gmm_in$lambda[key]),
    color = cols_in[key],
    lwd   = 1
  )
}

# Overlay feature data on UMAP or tSNE
# Cannot change number of columns when using FeaturePlot with split.by
plot_features <- function(sobj_in, x = "UMAP_1", y = "UMAP_2", feature, data_slot = "data", 
                          split_id = NULL, pt_size = 0.25, pt_outline = NULL, plot_cols = NULL,
                          feat_levels = NULL, split_levels = NULL, min_pct = NULL, max_pct = NULL, 
                          calc_cor = F, lm_line = F, lab_size = 3.7, lab_pos = c(0.8, 0.9), ...) {
  
  # Format imput data
  counts <- sobj_in
  
  if ("Seurat" %in% class(sobj_in)) {
    vars <- c(x, y, feature)
    
    if (!is.null(split_id)) {
      vars <- c(vars, split_id)
    }

    counts <- sobj_in %>%
      FetchData(vars = unique(vars), slot = data_slot) %>%
      as_tibble(rownames = "cell_ids")
  }
  
  # Rename features
  if (!is.null(names(feature))) {
    counts <- counts %>%
      rename(!!!syms(feature))
    
    feature <- names(feature)
  }
  
  if (!is.null(names(x))) {
    counts <- counts %>%
      rename(!!!syms(x))
    
    x <- names(x)
  }
  
  if (!is.null(names(y))) {
    counts <- counts %>%
      rename(!!!syms(y))
    
    y <- names(y)
  }
  
  # Set min and max values for feature
  if (!is.null(min_pct) || !is.null(max_pct)) {
    counts <- counts %>%
      mutate(
        pct_rank = percent_rank(!!sym(feature)),
        max_val  = ifelse(pct_rank > max_pct, !!sym(feature), NA),
        max_val  = min(max_val, na.rm = T),
        min_val  = ifelse(pct_rank < min_pct, !!sym(feature), NA),
        min_val  = max(min_val, na.rm = T),
        !!sym(feature) := if_else(!!sym(feature) > max_val, max_val, !!sym(feature)),
        !!sym(feature) := if_else(!!sym(feature) < min_val, min_val, !!sym(feature))
      )
  }

  # Set feature order
  if (!is.null(feat_levels)) {
    counts <- counts %>%
      mutate(!!sym(feature) := fct_relevel(!!sym(feature), feat_levels))
  }
  
  # Set facet order
  if (!is.null(split_id) && length(split_id) == 1) {
    counts <- counts %>%
      mutate(split_id = !!sym(split_id))
    
    if (!is.null(split_levels)) {
      counts <- counts %>%
        mutate(split_id = fct_relevel(split_id, split_levels))
    }
  }
  
  # Calculate correlation
  if (calc_cor) {
    if (!is.null(split_id)) {
      counts <- counts %>%
        group_by(split_id)
    }
    
    counts <- counts %>%
      mutate(
        cor_lab = cor(!!sym(x), !!sym(y)),
        cor_lab = round(cor_lab, digits = 2),
        cor_lab = str_c("r = ", cor_lab),
        min_x   = min(!!sym(x)),
        max_x   = max(!!sym(x)),
        min_y   = min(!!sym(y)),
        max_y   = max(!!sym(y)),
        lab_x   = (max_x - min_x) * lab_pos[1] + min_x,
        lab_y   = (max_y - min_y) * lab_pos[1] + min_y
      )
  }
  
  # Create scatter plot
  # To add outline for each cluster create separate layers
  res <- counts %>%
    arrange(!!sym(feature))
  
  if (!is.null(pt_outline)) {
    
    if (!is.numeric(counts[[feature]])) {
      res <- res %>%
        ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature), fill = !!sym(feature)))
      
      feats <- counts[[feature]] %>%
        unique()
      
      if (!is.null(feat_levels)) {
        feats <- feat_levels[feat_levels %in% feats]
      }
      
      for (feat in feats) {
        f_counts <- counts %>%
          filter(!!sym(feature) == feat)
        
        res <- res +
          geom_point(data = f_counts, aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F) +
          geom_point(data = f_counts, size = pt_size)
      }
      
    } else {
      res <- res %>%
        ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature))) +
        geom_point(aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F) +
        geom_point(size = pt_size)
    }
    
  } else {
    res <- res %>%
      ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature))) +
      geom_point(size = pt_size)
  }

  # Add regression line
  if (lm_line) {
    res <- res +
      geom_smooth(method = "lm", se = F, color = "black", size = 0.5, linetype = 2)
  }
  
  # Add correlation coefficient label
  if (calc_cor) {
    res <- res +
      geom_text(
        aes(x = lab_x, lab_y, label = cor_lab),
        color = "black",
        size  = lab_size,
        check_overlap = T, 
        show.legend = F
      )
  }
  
  # Set feature colors
  if (!is.null(plot_cols)) {
    if (is.numeric(counts[[feature]])) {
      res <- res +
        scale_color_gradientn(colors = plot_cols)
        # scale_color_gradient(low = plot_cols[1], high = plot_cols[2])

    } else {
      res <- res +
        scale_color_manual(values = plot_cols) +
        scale_fill_manual(values = plot_cols)
    }
  }
  
  # Split plot into facets
  if (!is.null(split_id)) {
    if (length(split_id) == 1) {
      res <- res +
        facet_wrap(~ split_id, ...)
      
    } else if (length(split_id) == 2) {
      eq <- str_c(split_id[1], " ~ ", split_id[2])
      
      res <- res +
        facet_grid(as.formula(eq), ...)
    }
  }
  
  res
}

# Run gprofiler
run_gprofiler <- function(gene_list, genome = NULL, gmt_id = NULL, p_max = params$p_max_GO, GO_size = params$term_size,
                          intrsct_size = params$intrsct_size, order_query = params$order_query, dbases = c("GO:BP", "GO:MF", "KEGG"), ...) {
  
  # Check for empty gene list
  if (is_empty(gene_list)) {
    return(as_tibble(NULL))
  }
  
  # Check arguments
  if (is.null(genome) && is.null(gmt_id)) {
    stop("ERROR: Must specifiy genome or gmt_id")
  }
  
  # Use gmt id
  if (!is.null(gmt_id)) {
    genome <- gmt_id
    dbases <- NULL
  }
  
  # Run gProfileR
  res <- gene_list %>%
    gost(
      organism       = genome,
      sources        = dbases,
      domain_scope   = "annotated",
      significant    = T,
      user_threshold = p_max,
      ordered_query  = order_query,
      ...
    )
  
  # Format and sort output data.frame
  res <- as_tibble(res$result)
  
  if (!is_empty(res)) {
    res <- res %>%
      filter(
        term_size > GO_size,
        intersection_size > intrsct_size
      ) %>%
      arrange(source, p_value)
  }
  
  res
}

# Create GO bubble plot
create_bubbles <- function(GO_df, plot_colors = theme_cols[c(1:2, 4, 9)],
                           n_terms = 15) {

  # Check for empty inputs
  if (is_empty(GO_df) || nrow(GO_df) == 0) {
    res <- ggplot() +
      geom_blank()
    
    return(res)
  }
  
  # Shorten GO terms and database names
  GO_data <- GO_df %>%
    mutate(
      term_id = str_remove(term_id, "(GO|KEGG):"),
      term_id = str_c(term_id, " ", term_name),
      term_id = str_to_lower(term_id),
      term_id = str_trunc(term_id, 40, "right"),
      source  = fct_recode(
        source,
        "Biological\nProcess" = "GO:BP",
        "Cellular\nComponent" = "GO:CC",
        "Molecular\nFunction" = "GO:MF",
        "KEGG"                = "KEGG"
      )
    )
  
  # Reorder database names
  plot_levels <- c(
    "Biological\nProcess",
    "Cellular\nComponent",
    "Molecular\nFunction",
    "KEGG"
  )
  
  GO_data <- GO_data %>%
    mutate(source = fct_relevel(source, plot_levels))
  
  # Extract top terms for each database
  top_GO <- GO_data %>%
    group_by(source) %>%
    arrange(p_value) %>%
    dplyr::slice(1:n_terms) %>%
    ungroup()
  
  # Create bubble plots
  res <- GO_data %>%
    ggplot(aes(1.25, -log10(p_value), size = intersection_size)) +
    geom_point(color = plot_colors, alpha = 0.5, show.legend = T) +
    geom_text_repel(
      aes(2, -log10(p_value), label = term_id),
      data         = top_GO,
      size         = 2.3,
      direction    = "y",
      hjust        = 0,
      segment.size = NA
    ) +
    xlim(1, 8) +
    labs(y = "-log10(p-value)") +
    theme_info +
    theme(
      axis.title.x    = element_blank(),
      axis.text.x     = element_blank(),
      axis.ticks.x    = element_blank()
    ) +
    facet_wrap(~ source, scales = "free", nrow = 1)
  
  res
}

# Plot percentage of cells in given groups
plot_cell_count <- function(sobj_in, group_id, split_id = NULL, group_order = NULL, fill_id, 
                            plot_colors = theme_cols, x_lab = "Cell type", y_lab = "Fraction of cells",
                            bar_pos = "fill", order_count = T, bar_line = 0, ...) {
  
  res <- sobj_in@meta.data %>%
    rownames_to_column("cell_ids") %>%
    mutate(
      group_id := !!sym(group_id),
      fill_id  := !!sym(fill_id)
    )
  
  if (!is.null(group_order)) {
    res <- res %>%
      mutate(group_id = fct_relevel(group_id, group_order))
  }
  
  if (!is.null(split_id)) {
    res <- res %>%
      mutate(split_id := !!sym(split_id))
  }
  
  if (order_count) {
    res <- res %>%
      mutate(fill_id = fct_reorder(fill_id, cell_ids, n_distinct))
  }

  res <- res %>%
    ggplot(aes(group_id, fill = fill_id)) +
    geom_bar(position = bar_pos, size = bar_line, color = "black") +
    scale_fill_manual(values = plot_colors) +
    labs(x = x_lab, y = y_lab) +
    theme_info

  if (!is.null(split_id)) {
    res <- res +
      facet_wrap(~ split_id, ...)
  }
  
  res
}

# Find markers with presto
find_markers <- function(input_sobj, group_column = NULL, uniq = params$uniq_markers, 
                         FC_min = params$FC_min, auc_min = params$auc_min, p_max = params$p_max_markers, 
                         pct_in_min = params$pct_in_min, pct_out_max = params$pct_out_max, ...) {
  
  res <- input_sobj %>%
    wilcoxauc(group_by = group_column) %>%
    as_tibble() %>%
    filter(
      padj    < p_max,
      logFC   > FC_min,
      auc     > auc_min,
      pct_in  > pct_in_min,
      pct_out < pct_out_max
    ) %>%
    arrange(desc(logFC))
  
  if (uniq) {
    res <- res %>%
      group_by(feature) %>%
      filter(n() == 1) %>%
      ungroup()
  }
  
  res
}

# Find cluster markers for each separate cell type
find_group_markers <- function(input_grp, input_sobj, grp_column, clust_column) {
  
  res <- input_sobj %>%
    subset(!!sym(grp_column) == input_grp)

  clusts <- res@meta.data %>%
    pull(clust_column)

  if (n_distinct(clusts) < 2) {
    return(NULL)
  }

  res <- res %>%
    find_markers(group_column = clust_column) %>%
    mutate(cell_type = input_grp)

  res
}

# Create reference UMAP for comparisons
create_ref_umap <- function(input_sobj, pt_mtplyr = 1, pt_outline = NULL, color_guide, ...) {
  
  if (is.null(pt_outline)) {
    pt_outline <- 0.1 * pt_mtplyr + 0.3
  }
  
  res <- input_sobj %>%
    plot_features(
      pt_size     = 0.1 * pt_mtplyr,
      pt_outline  = pt_outline,
      ...
    ) +
    guides(color = color_guide) +
    blank_theme +
    theme(
      legend.position = "top",
      legend.title    = element_blank(),
      legend.text     = element_text(size = 10)
    )
  
  res
}

# Create UMAPs showing marker gene signal
create_marker_umaps <- function(input_sobj, input_markers, umap_col = NULL, add_outline = NULL, 
                                pt_mtplyr = 1, low_col = "#fafafa") {
  
  # pt_size <- 0.25 * pt_mtplyr
  pt_size <- 0.1 * pt_mtplyr
  
  if (!is.null(umap_col)) {
    input_markers <- set_names(
      x  = rep(umap_col, length(input_markers)),
      nm = input_markers
    )
  }
  
  res <- input_markers %>%
    imap(~ {
      input_sobj %>%
        plot_features(
          feature    = .y,
          plot_cols  = c(low_col, .x),
          pt_outline = add_outline,
          pt_size    = pt_size
        ) +
        ggtitle(.y) +
        blank_theme +
        theme(
          plot.title        = element_text(size = 13),
          legend.position   = "bottom",
          legend.title      = element_blank(),
          legend.text       = element_text(size = 8),
          legend.key.height = unit(0.1, "cm"),
          legend.key.width  = unit(0.3, "cm"),
          axis.title.y      = element_text(size = 13, color = "white"),
          axis.text.y       = element_text(size = 8, color = "white")
        )
    })
  
  res
}

# Create boxplots showing marker gene signal
create_marker_boxes <- function(input_sobj, input_markers, clust_column, box_cols,
                                group = NULL, include_legend = F, all_boxes = F,
                                all_violins = F, order_boxes = T, n_boxes = 10,
                                n_rows = 2, pt_mtplyr = 1, exclude_clust = NULL, ...) {
  
  # Retrieve and format data for boxplots
  input_markers <- input_markers %>%
    head(n_boxes)
  
  box_data <- input_sobj %>%
    FetchData(c(clust_column, input_markers)) %>%
    as_tibble(rownames = "cell_id") %>%
    filter(!(!!sym(clust_column) %in% exclude_clust)) %>%
    mutate(grp = str_remove(!!sym(clust_column), "^[a-zA-Z0-9_]+-"))  # This is specific for Shannon's subtype clusters
  
  input_markers <- input_markers %>%
    str_trunc(9)
  
  # Filter based on input group
  if (!is.null(group)) {
    box_data <- box_data %>%
      filter(grp == group)
  }
  
  # Format data for plots
  box_data <- box_data %>%
    pivot_longer(cols = c(-cell_id, -grp, -!!sym(clust_column)), names_to = "key", values_to = "Counts") %>%
    mutate(
      !!sym(clust_column) := fct_relevel(!!sym(clust_column), names(box_cols)),
      key = str_trunc(key, width = 9, side = "right"),
      key = fct_relevel(key, input_markers)
    )
  
  # Order boxes by mean signal
  if (order_boxes) {
    box_data <- box_data %>%
      mutate(!!sym(clust_column) := fct_reorder(!!sym(clust_column), Counts, mean, .desc = T))
  }
  
  n_clust <- box_data %>%
    pull(clust_column) %>%
    n_distinct()
  
  # Create plots
  n_cols <- ceiling(n_boxes / n_rows)
  
  res <- box_data %>%
    ggplot(aes(!!sym(clust_column), Counts, color = !!sym(clust_column))) + 
    facet_wrap(~ key, ncol = n_cols) +
    scale_color_manual(values = box_cols) +
    theme_info +
    theme(
      panel.spacing.x  = unit(0.7, "cm"),
      strip.background = element_blank(),
      strip.text       = element_text(size = 13),
      legend.position  = "none",
      axis.title.x     = element_blank(),
      axis.title.y     = element_text(size = 13),
      axis.text.x      = element_blank(),
      axis.text.y      = element_text(size = 8),
      axis.ticks.x     = element_blank(),
      axis.line.x      = element_blank()
    )
  
  # Adjust output plot type
  if (n_clust > 6 || all_boxes) {
    res <- res +
      geom_boxplot(aes(fill = !!sym(clust_column), color = !!sym(clust_column)), size = 0, outlier.color = "#f0f0f0", outlier.size = 0.25) +
      stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
      scale_fill_manual(values = box_cols) +
      theme(...)
    
  } else if (all_violins) {
    res <- res +
      geom_violin(aes(fill = !!sym(clust_column)), size = 0.2) +
      stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
      scale_fill_manual(values = box_cols) +
      scale_color_manual(values = box_cols) +
      theme(...)
    
  } else {
    pt_size <- 0.3 * pt_mtplyr
    
    res <- res +
      geom_quasirandom(size = pt_size) +
      theme(...)
  }
  
  # Add legend
  if (include_legend) {
    res <- res +
      guides(color = col_guide) +
      theme(legend.position = "top")
  }
  
  # Add blank space for missing facets
  n_keys <- n_distinct(box_data$key)
  
  if (n_keys <= n_cols && n_rows > 1) {
    n_keys <- if_else(n_keys == 1, 2, as.double(n_keys))
    n_cols <- floor(n_cols / n_keys)
    
    res <- res %>%
      plot_grid(
        ncol = n_cols,
        nrow = 2
      )
  }
  
  res
}

# Create figure summarizing marker genes
create_marker_fig <- function(input_sobj, input_markers, input_GO, clust_column, 
                              input_umap, umap_color, fig_heights = c(0.46, 0.3, 0.3), 
                              GO_genome = params$genome, box_colors, n_boxes = 10,
                              umap_outline = NULL, umap_mtplyr = 1, xlsx_name = NULL, 
                              sheet_name = NULL, ...) {
  
  marks_umap <- marks_boxes <- GO_bubbles <- ggplot() +
    geom_blank() +
    theme_void()
  
  # Create UMAPs showing marker gene signal
  if (nrow(input_markers) > 0) {
    top_marks <- input_markers$feature %>%
      head(n_boxes)
    
    clust_legend <- get_legend(input_umap)
    
    input_umap <- input_umap +
      theme(legend.position = "none")
    
    marks_umap <- input_sobj %>%
      create_marker_umaps(
        input_markers = head(top_marks, 7),
        umap_col      = umap_color,
        add_outline   = umap_outline,
        pt_mtplyr     = umap_mtplyr
      ) %>%
      append(list(input_umap), .)
    
    marks_umap <- plot_grid(
      plotlist = marks_umap,
      ncol     = 4,
      nrow     = 2,
      align    = "vh",
      axis     = "trbl"
    )
    
    marks_umap <- plot_grid(
      clust_legend, marks_umap,
      rel_heights = c(0.2, 0.9),
      nrow = 2
    )
    
    # Create boxplots showing marker gene signal
    marks_boxes <- input_sobj %>%
      create_marker_boxes(
        input_markers = top_marks,
        clust_column  = clust_column,
        box_cols      = box_colors,
        n_boxes       = n_boxes,
        plot.margin   = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
        ...
      )
    
    # Create GO term plots
    if (nrow(input_GO) > 0) {
      GO_bubbles <- input_GO %>%
        create_bubbles(plot_colors = umap_color) +
        theme(
          plot.margin      = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
          strip.background = element_blank(),
          strip.text       = element_text(size = 13),
          axis.title.y     = element_text(size = 13),
          axis.text.y      = element_text(size = 8),
          axis.line.x      = element_blank(),
          legend.position  = "bottom",
          legend.title     = element_blank(),
          legend.text      = element_text(size = 8)
        )
      
      # Write GO terms to excel file 
      if (!is.null(xlsx_name)) {
        input_GO %>%
          dplyr::select(
            term_name,  term_id,
            source,     effective_domain_size,
            query_size, intersection_size,
            p_value,    significant 
          ) %>%
          arrange(source, p_value) %>%
          write.xlsx(
            file      = str_c(xlsx_name, "_GO.xlsx"),
            sheetName = sheet_name,
            append    = T
          )
      }
    }
    
    # Write markers to excel file
    if (!is.null(xlsx_name)) {
      input_markers %>%
        write.xlsx(
          file      = str_c(xlsx_name, "_markers.xlsx"),
          sheetName = sheet_name,
          append    = T
        )
    }
  }
  
  # Create final figure
  res <- plot_grid(
    marks_umap, marks_boxes, GO_bubbles,
    rel_heights = fig_heights,
    ncol        = 1
  )
  
  res
}

# Filter clusters and set cluster order
set_cluster_order <- function(input_cols, input_marks, n_cutoff = 5) {
  input_marks <- input_marks %>%
    group_by(group) %>%
    filter(n() >= n_cutoff) %>%
    ungroup()
  
  marks <- unique(input_marks$group)
  res   <- names(input_cols)
  res   <- res[res %in% marks]
  
  res
}

# Create v1 panel for marker genes
create_marker_panel_v1 <- function(input_sobj, input_cols, input_umap = NULL, clust_column, order_boxes = T,
                                   color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
                                   uniq = params$uniq_GO, umap_mtplyr = 6, xlsx_name = NULL, exclude_clust = NULL, 
                                   ...) {
  
  # Set point size
  # ref_mtplyr <- if_else(umap_mtplyr == 1, umap_mtplyr, umap_mtplyr * 2.5)
  umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
  ref_mtplyr <- umap_mtplyr
  
  # Find marker genes
  markers <- input_sobj %>%
    find_markers(group_column = clust_column)
  
  # Find GO terms
  GO_df <- markers
  
  if (nrow(markers) > 0) {
    GO_df <- markers %>%
      group_by(group) %>%
      do({
        arrange(., desc(logFC)) %>%
          pull(feature) %>%
          run_gprofiler(genome = params$genome)
      }) %>%
      ungroup()
  }
  
  if (uniq && nrow(GO_df) > 0) {
    GO_df <- GO_df %>%
      group_by(term_id) %>%
      filter(n() == 1) %>%
      ungroup()
  }
  
  # Set cluster order based on order of input_cols
  fig_clusters <- input_cols %>%
    set_cluster_order(markers)
  
  fig_clusters <- fig_clusters[!fig_clusters %in% exclude_clust]
  
  # Create figures
  for (i in seq_along(fig_clusters)) {
    cat("\n#### ", fig_clusters[i], "\n", sep = "")
    
    # Filter markers and GO terms
    clust <- fig_clusters[i]
    
    fig_marks <- markers %>%
      filter(group == clust)
    
    fig_GO <- GO_df %>%
      filter(group == clust)
    
    # Create reference umap
    ref_umap <- input_umap
    umap_col <- input_cols[clust]
    
    if (is.null(input_umap)) {
      umap_levels <- input_cols[names(input_cols) != clust]
      umap_levels <- names(c(umap_levels, umap_col))
      
      ref_umap <- input_sobj %>%
        create_ref_umap(
          feature     = clust_column,
          plot_cols   = input_cols,
          feat_levels = umap_levels,
          pt_mtplyr   = ref_mtplyr,
          color_guide = color_guide
        )
    }
    
    # Create panel
    marker_fig <- input_sobj %>%
      create_marker_fig(
        input_markers = fig_marks,
        input_GO      = fig_GO,
        clust_column  = clust_column,
        input_umap    = ref_umap,
        umap_color    = umap_col,
        box_colors    = input_cols,
        order_boxes   = order_boxes,
        umap_mtplyr   = umap_mtplyr,
        xlsx_name     = xlsx_name,
        sheet_name    = clust,
        exclude_clust = exclude_clust,
        ...
      )
    
    cat(nrow(fig_marks), "marker genes and", nrow(fig_GO), "GO terms were identified.")
    print(marker_fig)
    cat("\n\n---\n\n<br>\n\n<br>\n\n")
  }
}

# Create v2 panel that splits plots into groups
create_marker_panel_v2 <- function(input_sobj, input_markers, input_cols, grp_column, clust_column,
                                   color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)), 
                                   uniq_GO = params$uniq_GO, umap_mtplyr = 6, xlsx_name = NULL, clust_regex = "^[a-zA-Z0-9_]+-",
                                   ...) {
  
  # Set point size
  # ref_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr * 2.5, 1)
  umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
  ref_mtplyr <- umap_mtplyr
  
  # Figure colors and order
  fig_clusters <- input_cols %>%
    set_cluster_order(input_markers)
  
  # Find GO terms
  GO_df <- input_markers %>%
    group_by(group) %>%
    do({
      arrange(., desc(logFC)) %>%
        pull(feature) %>%
        run_gprofiler(genome = params$genome)
    }) %>%
    ungroup()
  
  if (uniq_GO && nrow(GO_df) > 0) {
    GO_df <- GO_df %>%
      group_by(term_id) %>%
      filter(n() == 1) %>%
      ungroup()
  }
  
  # Create figures
  for (i in seq_along(fig_clusters)) {
    cat("\n#### ", fig_clusters[i], "\n", sep = "")
    
    # Filter markers and GO terms
    clust <- fig_clusters[i]
    
    fig_marks <- input_markers %>%
      filter(group == clust)
    
    fig_GO <- GO_df %>%
      filter(group == clust)
    
    # Set colors
    umap_col <- input_cols[clust]
    
    group <- clust %>%
      str_remove(clust_regex)
    
    grp_regex <- str_c("-", group, "$") %>%
      str_replace("\\+", "\\\\+")            # include this to escape "+" in names
    
    fig_cols <- input_cols[grepl(grp_regex, names(input_cols))]
    fig_cols <- c( "Other" = "#fafafa", fig_cols)
    ref_cols <- fig_cols[names(fig_cols) != clust]
    ref_cols <- c(ref_cols, umap_col)
    
    # Create reference UMAP
    ref_umap <- input_sobj %>%
      FetchData(c("UMAP_1", "UMAP_2", grp_column, clust_column)) %>%
      as_tibble(rownames = "cell_id") %>%
      mutate(!!sym(clust_column) := if_else(
        !!sym(grp_column) != group, 
        "Other", 
        !!sym(clust_column)
      )) %>%
      create_ref_umap(
        feature     = clust_column,
        plot_cols   = ref_cols,
        feat_levels = names(ref_cols),
        pt_mtplyr   = ref_mtplyr,
        color_guide = color_guide
      )
    
    # Create panel
    marker_fig <- input_sobj %>%
      create_marker_fig(
        input_markers = fig_marks,
        input_GO      = fig_GO,
        clust_column  = clust_column,
        input_umap    = ref_umap,
        umap_color    = umap_col,
        box_colors    = fig_cols,
        group         = group,
        umap_mtplyr   = umap_mtplyr,
        xlsx_name     = xlsx_name,
        sheet_name    = clust,
        ...
      )
    
    cat(nrow(fig_marks), "marker genes were identified.", nrow(fig_GO), "GO terms were identified.")
    print(marker_fig)
    cat("\n\n---\n\n<br>\n\n<br>\n\n")
  }
}


# Load packages
R_packages <- c(
  "tidyverse",  "Seurat",
  "gprofiler2", "knitr",
  "cowplot",    "ggbeeswarm",
  "ggrepel",    "RColorBrewer",
  "xlsx",       "colorblindr",
  "ggforce",    "broom",
  "mixtools",   "clustifyr",
  "boot",       "scales",
  "patchwork",  "ComplexHeatmap",
  "devtools",   "broom",
  "presto",     "here"
)

purrr::walk(R_packages, library, character.only = T)


# ggplot2 themes
theme_info <- theme_cowplot() +
  theme(
    plot.title       = element_text(face = "plain", size = 20),
    strip.background = element_blank(),
    strip.text       = element_text(face = "plain")
  )

umap_theme <- theme_info +
  theme(
    axis.text  = element_blank(),
    axis.ticks = element_blank()
  )

blank_theme <- umap_theme +
  theme(
    axis.line  = element_blank(),
    axis.title = element_blank()
  )

# Legend guides
col_guide <- guide_legend(override.aes = list(size = 3.5, shape = 16))

outline_guide <- guide_legend(override.aes = list(
  size   = 3.5,
  shape  = 21,
  color  = "black",
  stroke = 0.25
))

# Okabe Ito color palettes
ito_cols <- c(
  palette_OkabeIto[1:4], "#d7301f", 
  palette_OkabeIto[5:6], "#6a51a3", 
  palette_OkabeIto[7:8]
)

ito_cols <- ito_cols %>%
  darken(0.4) %>%
  c(ito_cols, ., "#686868", "#000000")

# Set default palette
get_cols <- create_col_fun(ito_cols)

# OVA colors
ova_cols_1 <- c(
  "ova high" = "#475C81",
  "ova low"  = "#B8ECFF",
  "Other"    = "#ffffff"
)

ova_cols_2 <- c(
  "ova low"  = "#B8ECFF",
  "ova high" = "#475C81",
  "Other"    = "#ffffff"
)

ova_cols_3 <- c(
  "Other"    = "#ffffff",
  "ova high" = "#475C81",
  "ova low"  = "#B8ECFF"
)

ova_cols_4 <- c(
  "Other"    = "#ffffff",
  "ova low"  = "#B8ECFF",
  "ova high" = "#475C81"
)

# Feature colors
feat_cols <- c(
  "#D7301F", "#0072B2",
  "#009E73", "#4C3250",
  "#E69F00", "#875C04"
)

# Cell subtype palettes
common_cols <- c(
  "Epithelial" = "#6a51a3",
  "B Cell"     = "#E69F00",
  "T Cell"     = "#009E73",
  "Unassigned" = "#ffffff"
)

so_cols <- so_types %>%
  map(
    set_type_cols,
    sobjs_in   = sobjs,
    type_key   = so_types,
    cols_in    = get_cols(),
    other_cols = common_cols
  )
# Add one to variable
plus_one <- function(x, n = 1) {
  cmd <- str_c(x, " <<- ", x, " + ", n)
  
  eval(parse(text = cmd))
  eval(parse(text = x))
}

# Add arrows to axis
add_arrow_axis <- function(gg_in, fract = 0.1, ...) {
  
  get_line_coords <- function(range_in, fract) {
    mn  <- range_in[1]
    mx  <- range_in[2]
    dif <- mx - mn

    res <- c(mn - (dif * 0.05), mn + (dif * fract))

    res
  }
  
  x_coords <- ggplot_build(gg_in)$layout$panel_scales_x[[1]]$range$range %>%
    get_line_coords(fract = fract)

  y_coords <- ggplot_build(gg_in)$layout$panel_scales_y[[1]]$range$range %>%
    get_line_coords(fract = fract)
   
  res <- gg_in +
    geom_segment(
      x        = x_coords[1],
      xend     = x_coords[2], 
      y        = y_coords[1], 
      yend     = y_coords[1],
      size     = 0.25,
      color    = "black",
      linejoin = "bevel",
      arrow    = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
      ...
    ) +
    geom_segment(
      y        = y_coords[1], 
      yend     = y_coords[2], 
      x        = x_coords[1], 
      xend     = x_coords[1], 
      size     = 0.25,
      color    = "black",
      linejoin = "bevel",
      arrow    = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
      ...
    ) +
    theme(axis.title = element_text(hjust = 0, size = 10))
  
  res
}

# Set equal x-axis scales
equalize_x <- function(gg_list_in, log_tran = T, ...) {
  
  set_lims <- function(gg_in, min_x, max_x, log_tran, ...) {
    res <- gg_in +
      coord_cartesian(xlim = c(min_x, max_x))
    
    if (log_tran) {
      res <- res +
        scale_x_log10(labels = trans_format("log10", math_format(10^.x)), ...)
    }
    
    res
  }
  
  gg_ranges <- gg_list_in %>%
    map(~ ggplot_build(.x)$layout$panel_scales_x[[1]]$range$range)
  
  min_val <- gg_ranges %>%
    map_dbl(~ .x[1]) %>%
    min()
  
  max_val <- gg_ranges %>%
    map_dbl(~ .x[2]) %>%
    max()
  
  res <- gg_list_in %>%
    map(
      set_lims,
      min_x    = min_val, 
      max_x    = max_val, 
      log_tran = log_tran,
      ...
    )
  
  res
}

# Create figure 3 panels
create_fig3 <- function(sobj_in, cols_in, subtype_column = "subtype", data_slot = "counts", ova_cols = c("#fafafa", "#d7301f"),
                        box_counts = c("Relative ova signal" = "ova_fc"), umap_counts = c("ova counts" = "adt_ovalbumin"), 
                        plot_title = NULL, arrow_axis = F, pt_size = 0.1, pt_outline = 0.4, pt_size_2 = 0.3, pt_outline_2 = 0.5,
                        umap_cell_count = F, box_cell_count = T, control_types = c("B Cell", "T Cell"), ...) {
  
  box_column <- umap_column <- "cell_type"
  box_cols <- umap_cols <- cols_in
  
  # Fetch plotting data
  data_df <- sobj_in %>%
    FetchData(c(subtype_column, box_counts, umap_counts, "UMAP_1", "UMAP_2"), slot = data_slot) %>%
    as_tibble(rownames = "cell_id")
  
  if (!is.null(names(box_counts))) {
    data_df <- data_df %>%
      rename(!!box_counts)
    
    box_counts <- names(box_counts)
  }
  
  if (!is.null(names(umap_counts))) {
    data_df <- data_df %>%
      rename(!!umap_counts)
    
    umap_counts <- names(umap_counts)
  }
  
  # Set subtype order
  # Move select cell types to front of order
  data_df <- data_df %>%
    mutate(
      cell_type = !!sym(subtype_column),
      cell_type = fct_reorder(cell_type, !!sym(box_counts), median)
    )
  
  type_order <- levels(data_df$cell_type)
  
  if (!is.null(control_types)) {
    control_types <- control_types[control_types %in% type_order]
    type_order <- type_order[!type_order %in% control_types]
    type_order <- c(control_types, type_order)
  }
  
  # Count cells for each subtype
  data_df <- data_df %>%
    group_by(cell_type) %>%
    mutate(cell_count = n_distinct(cell_id)) %>%
    ungroup() %>%
    mutate(cell_type = fct_relevel(cell_type, type_order)) %>%
    arrange(cell_type) %>%
    mutate(
      cell_count = str_c(cell_type, "\n(n = ", cell_count, ")"),
      cell_count = fct_inorder(cell_count)
    )
  
  # Set cell type colors
  names(type_order) <- levels(data_df$cell_count)
  
  cols_df <- tibble(
    cell_type = type_order,
    cell_count = names(type_order)
  )
  
  cols_df <- cols_df %>%
    mutate(color = cols_in[cell_type])
  
  # Subtype UMAP
  if (umap_cell_count) {
    umap_column <- "cell_count"
    umap_cols <- setNames(cols_df$color, cols_df$cell_count)
  }
  
  umap <- data_df %>%
    plot_features(
      feature     = umap_column,
      pt_size     = pt_size,
      pt_outline  = pt_outline,
      plot_cols   = umap_cols,
      feat_levels = rev(names(umap_cols))
    ) +
    guides(color = guide_legend(override.aes = list(size = 3.5))) +
    ggtitle(plot_title) +
    blank_theme +
    theme(
      plot.title = element_text(size = 12),
      legend.position = "none"
    ) +
    theme(...)
  
  if (arrow_axis) {
    umap <- add_arrow_axis(umap)
  }
  
  # OVA UMAP
  if (!is.null(umap_counts)) {
    ova_umap <- data_df %>%
      plot_features(
        feature    = umap_counts,
        plot_cols  = ova_cols,
        pt_size    = pt_size_2,
        pt_outline = pt_outline_2,
        min_pct    = 0.01,
        max_pct    = 0.99
      ) +
      blank_theme +
      theme(
        plot.title        = element_text(size = 10, hjust = 0.5),
        legend.position   = "right",
        legend.key.width  = unit(0.15, "cm"),
        legend.key.height = unit(0.30, "cm"),
        legend.title      = element_text(size = 10),
        legend.text       = element_text(size = 10)
      )
    
    if (arrow_axis) {
      ova_umap <- add_arrow_axis(ova_umap)
    }
  }
  
  # OVA boxes
  if (box_cell_count) {
    box_column <- "cell_count"
    box_cols <- setNames(cols_df$color, cols_df$cell_count)
  }
  
  boxes <- data_df %>%
    ggplot(aes(!!sym(box_counts), !!sym(box_column), fill = !!sym(box_column))) +
    geom_violin(size = 0.3, draw_quantiles = c(0.25, 0.75), alpha = 0.75) +
    stat_summary(geom = "point", color = "black", fun = median) +
    scale_color_manual(values = box_cols) +
    scale_fill_manual(values = box_cols) +
    theme_minimal_vgrid() +
    theme(
      legend.position    = "none",
      axis.title.y       = element_blank(),
      axis.title         = element_text(size = 10),
      axis.text          = element_text(size = 10),
      axis.ticks.x       = element_line(size = 0.1),
      panel.grid.major.x = element_line(size = 0.1)
    )
  
  res <- list(umap, boxes)
  
  if (!is.null(umap_counts)) {
    res <- append(res, list(ova_umap))
  }
  
  names(res) <- rep(plot_title, length(res))
  
  res
}

# Create figure 4 panels
create_fig4 <- function(sobj_in, ova_cols, feats, feat_cols, ref_cols, pt_size = 0.00001, pt_outline = 0.4, gmm_filt = NULL,
                        low_col = c("white", "white"), sep_bar_labs = F, plot_boxes = T, median_pt = 1) {

  # Theme elements
  legd_guide <- guide_legend(override.aes = list(
    size   = 3.5,
    shape  = 21,
    color  = "black",
    stroke = 0.25
  ))
  
  text_theme <- theme(
    axis.title  = element_text(size = 10),
    legend.text = element_text(size = 8),
    axis.text   = element_text(size = 8)
  )
  
  # Run GMM
  if (!is.null(gmm_filt)) {
    sobj_in <- sobj_in %>%
      classify_ova(
        filt_column = names(gmm_filt), 
        filt        = gmm_filt,
        quiet       = T
      )
  }

  # Data for OVA group UMAP
  ova_order <- names(ova_cols)
  
  data_df <- sobj_in@meta.data %>%
    as_tibble(rownames = "cell_id") %>%
    group_by(GMM_grp) %>%
    mutate(cell_count = n_distinct(cell_id)) %>%
    ungroup() %>%
    mutate(GMM_grp = fct_relevel(GMM_grp, ova_order)) %>%
    arrange(GMM_grp) %>%
    mutate(
      cell_count = str_c(GMM_grp, "\n(n = ", cell_count, ")"),
      cell_count = fct_inorder(cell_count)
    )
  
  # Set OVA group colors
  names(ova_order) <- levels(data_df$cell_count)
  
  cols_df <- tibble(
    grp        = ova_order,
    cell_count = names(ova_order)
  )
  
  cols_df <- cols_df %>%
    mutate(color = ova_cols[grp])
  
  umap_cols <- set_names(
    x  = cols_df$color,
    nm = cols_df$cell_count
  )

  ova_guide <- legd_guide
  ova_guide$reverse <- T
  
  # Create OVA group UMAP
  ova_grp_umap <- data_df %>%
    plot_features(
      feature     = "cell_count",
      pt_size     = pt_size,
      pt_outline  = pt_outline,
      plot_cols   = umap_cols,
      feat_levels = names(ova_order)
    ) +
    guides(color = ova_guide, fill = ova_guide) +
    text_theme +
    blank_theme +
    theme(
      plot.margin     = unit(c(0.2, 1, 0.2, 1.5), "cm"),
      legend.position = "top",
      legend.title    = element_blank(),
      legend.text     = element_text(size = 8)
    )
  
  # OVA hist
  ova_hist <- sobj_in@meta.data %>%
    filter(GMM_grp != "Other") %>%
    mutate(GMM_grp = fct_relevel(GMM_grp, c("ova low", "ova high"))) %>%
    ggplot(aes(adt_ovalbumin, after_stat(density), fill = GMM_grp)) +
    
    stat_density(geom = "point", position = "identity", size = 0, color = "white") +
    geom_density(fill = "white", color = "white", size = 0.3, alpha = 0.9) +
    geom_density(size = 0.3, alpha = 0.8, show.legend = F) +
  
    geom_vline(aes(xintercept = mu), size = 0.5, linetype = 2, color = "grey35") +
    coord_cartesian(ylim = c(0, 1.7)) +
    scale_fill_manual(values = ova_cols) +
    scale_x_log10(labels = trans_format("log10", math_format(10^.x))) +
    labs(x = "ova counts", y = "Density") +
    guides(fill = guide_legend(override.aes = list(shape = 22, size = 3.5, stroke = 0.25, color = "black"))) +
    theme_minimal_hgrid() +
    text_theme +
    theme(
      plot.margin        = unit(c(0.2, 0.5, 0.2, 0.2), "cm"),
      legend.position    = c(0.05, 0.92),
      legend.key.height  = unit(0.15, "cm"),
      legend.title       = element_blank(),
      axis.line.y        = element_line(size = 0.5, color = "grey85"),
      axis.ticks.y       = element_line(size = 0.1),
      panel.grid.major.y = element_line(size = 0.1)
    )
  
  # OVA subtype bar graphs
  type_bar <- sobj_in@meta.data %>%
    as_tibble(rownames = "cell_id") %>%
    mutate(
      subtype = fct_reorder(subtype, cell_id, n_distinct),
      GMM_grp = fct_relevel(GMM_grp, c("ova low", "ova high", "Other"))
    ) %>%
    ggplot(aes(GMM_grp, fill = subtype)) +
    
    stat_count(position = "fill", geom = "point", size = 0, color = "white") +
    geom_bar(position = "fill", size = 0.25, color = "black", show.legend = F) +
    
    scale_fill_manual(values = ref_cols) +
    scale_y_continuous(breaks = c(0, 0.5, 1)) +
    guides(fill = guide_legend(ncol = 1, override.aes = list(shape = 22, size = 3.5, stroke = 0.25, color = "black"))) +
    labs(y = "Fraction of cells") +
    theme_minimal_hgrid() +
    text_theme +
    theme(
      legend.title       = element_blank(),
      legend.key.height  = unit(0.35, "cm"),
      axis.title.x       = element_blank(),
      axis.line.y        = element_line(size = 0.5, color = "grey85"),
      axis.ticks.y       = element_line(size = 0.1),
      panel.grid.major.y = element_blank()
    )
  
  if (sep_bar_labs) {
    type_bar <- type_bar +
      theme(axis.text.x = element_text(hjust = c(0.9, 0.5, 0.1)))
  }
  
  # Reference UMAP
  ref_umap <- sobj_in %>%
    create_ref_umap(
      feature     = "subtype",
      color_guide = legd_guide,
      plot_cols   = ref_cols,
      pt_mtplyr   = pt_size / 0.1,
      pt_outline  = pt_outline
    ) +
    theme(
      plot.margin     = unit(c(0.2, 1.5, 0.2, 0.2), "cm"),
      legend.position = "left",
      legend.margin   = margin(0.2, 0.2, 0.2, 1.5, "cm"),
      legend.text     = element_text(size = 8)
    )
  
  # Create list of feature UMAPs
  feat_umaps <- sobj_in %>%
    create_marker_umaps(
      pt_mtplyr     = pt_size / 0.1,
      add_outline   = pt_outline,
      input_markers = feat_cols,
      low_col       = low_col
    ) %>%
    map(~ {
      .x +
        guides(color = guide_colorbar(frame.colour = "black", frame.linewidth = 0.2)) +
        theme(plot.title = element_text(size = 12))
    })
  
  # Top panel of feature UMAPs
  top_umaps <- append(list(ref_umap), feat_umaps[1:2]) %>%
    plot_grid(
      plotlist   = .,
      rel_widths = c(1, 0.5, 0.5),
      nrow       = 1
    )
  
  # Bottom panel of feature UMAPs
  bot_umaps <- feat_umaps[3:6] %>%
    plot_grid(
      plotlist = .,
      nrow     = 1,
      align    = "h",
      axis     = "tb"
    )
  
  # Final feature UMAP figure
  feat_umaps <- plot_grid(
    top_umaps, bot_umaps,
    ncol  = 1,
    align = "vh",
    axis  = "trbl"
  ) + 
    theme(plot.margin = unit(c(1, 0.2, 1.5, 0.2), "cm"))
  
  # Feature violin plots
  box_data <- sobj_in %>%
    FetchData(c(feats, "subtype", "GMM_grp")) %>%
    as_tibble(rownames = "cell_id") %>%
    pivot_longer(cols = c(-cell_id, -subtype, -GMM_grp)) %>%
    mutate(type_name = str_c(subtype, "_", name)) %>%
    group_by(type_name) %>%
    mutate(up_qt = boxplot.stats(value)$stats[4]) %>%
    ungroup() %>%
    mutate(type_name = fct_reorder(type_name, up_qt, median, .desc = T)) %>%
    # mutate(
    #   type_name = str_c(subtype, "_", name),
    #   type_name = fct_reorder(type_name, value, median, .desc = T)
    # ) %>%
    group_by(subtype, name) %>%
    mutate(med = median(value)) %>%
    ungroup() %>%
    mutate(name = fct_reorder(name, med, max, .desc = T))
    
  feat_boxes <- box_data %>%
    ggplot(aes(type_name, value, fill = subtype)) +
    facet_wrap(~ name, nrow = 1, scales = "free_x") +
    scale_fill_manual(values = ref_cols) +
    scale_color_manual(values = ref_cols) +
    labs(y = "Counts") +
    theme_minimal_hgrid() +
    text_theme +
    theme(
      strip.text         = element_text(size = 12),
      legend.title       = element_blank(),
      legend.key.height  = unit(0.35, "cm"),
      axis.title.x       = element_blank(),
      axis.text.x        = element_blank(),
      axis.line.x        = element_blank(),
      axis.ticks.x       = element_blank(),
      axis.line.y        = element_line(size = 0.5, color = "grey85"),
      panel.grid.major.y = element_blank(),
      panel.background   = element_rect(fill = "#fafafa")
    )
  
  if (plot_boxes) {
    feat_boxes <- feat_boxes +
      stat_summary(geom = "point", shape = 22, fun = median, size = 0) +
      stat_summary(geom = "point", shape = 22, fun = median, size = median_pt + 1, color = "black") +
      geom_boxplot(
        color          = "white",
        fill           = "white",
        alpha          = 1,
        size           = 0.3,
        width          = 0.6,
        outlier.colour = "white",
        outlier.alpha  = 1,
        outlier.size   = 0.1,
        coef           = 0
      ) +
      geom_boxplot(
        size           = 0.3,
        width          = 0.6,
        outlier.colour = "grey85",
        outlier.alpha  = 1,
        outlier.size   = 0.1,
        show.legend    = F,
        coef           = 0,
        fatten = 0
      ) +
      stat_summary(
        aes(color = subtype),
        geom        = "point",
        shape       = 22,
        fun         = median,
        size        = median_pt,
        stroke      = 1,
        fill        = "white",
        show.legend = F
      ) +
      guides(fill = guide_legend(override.aes = list(size = 3.5, stroke = 0.25)))
    
  } else {
    feat_boxes <- feat_boxes +
      geom_violin(size = 0.3, scale = "width") +
      stat_summary(geom = "point", color = "black", alpha = 1, fun = median) +
      theme(panel.spacing = unit(0.5, "cm"))
  }
  
  # Final top panel
  top_lets <- letters
  bot_lets <- c("", "d", "e")
  
  if (!is.null(gmm_filt)) {
    type_bar <- ggplot() +
      theme_void()
    
    top_lets <- c(letters[1:2], "")
    bot_lets <- c("", "c", "d")
  }
  
  top <- plot_grid(
    ova_grp_umap, ova_hist, type_bar,
    rel_widths     = c(0.86, 1, 0.9),
    labels         = top_lets,
    label_fontface = "plain",
    label_size     = 18,
    align          = "h",
    axis           = "tb",
    nrow           = 1
  )
  
  # Final bottom panel
  blank_gg <- ggplot() +
    theme_void()
  
  bot <- plot_grid(
    feat_boxes, blank_gg,
    rel_widths = c(1, if_else(plot_boxes, 0.45, 0.2))
  )
  
  # Create final figure
  res <- plot_grid(
    top, feat_umaps, feat_boxes,
    rel_heights    = c(0.55, 1.05, 0.35),
    ncol           = 1,
    labels         = bot_lets,
    label_fontface = "plain",
    label_size     = 18
  )
  
  res
}

# Plot correlation
plot_corr <- function(sobj_in, x, y, feat, data_slot, cols_in, plot_title, ...) {
  
  res <- sobj_in %>%
    FetchData(c(x, y, feat), slot = data_slot) %>%
    mutate(
      !!sym(x) := log10(!!sym(x)),
      !!sym(y) := log10(!!sym(y))
    ) %>%
    filter(
      !!sym(x) != -Inf,
      !!sym(y) != -Inf
    ) %>%
    plot_features(
      x         = x,
      y         = y,
      feature   = feat,
      data_slot = "counts",
      plot_cols = cols_in,
      lab_pos   = c(0.9, 1),
      lab_size  = 5,
      calc_cor  = T,
      lm_line   = T,
      ...
    ) +
    ggtitle(plot_title) +
    guides(color = guide_legend(override.aes = list(size = 3.5))) +
    theme_info +
    theme(legend.title = element_blank())
  
  res
}

# Calculate pairwise p-values for gg objects
calc_p_vals <- function(gg_in, sample_name, data_column, type_column, log_tran = T) {
  
  # Pull data from gg object
  gg_data <- gg_in$data
  
  # Log transform
  if (log_tran) {
    gg_data <- gg_data %>%
      mutate(!!sym(data_column) := log10(!!sym(data_column)))
  }
  
  # Calculate median
  gg_stats <- gg_data %>%
    group_by(!!sym(type_column)) %>%
    summarize(med = median(!!sym(data_column)))
  
  # Run wilcox test
  gg_counts <- gg_data %>%
    pull(data_column)
  
  gg_groups <- gg_data %>%
    pull(type_column)
  
  res <- gg_counts %>%
    pairwise.wilcox.test(
      g = gg_groups, 
      p.adj = "bonf"
    ) %>%
    tidy()
  
  # Add medians to data.frame
  res <- gg_stats %>%
    rename(med_1 = med) %>%
    right_join(res, by = c("cell_type" = "group1"))
  
  res <- gg_stats %>%
    rename(med_2 = med) %>%
    right_join(res, by = c("cell_type" = "group2"))
  
  # Format final table
  res <- res %>%
    mutate(Sample = sample_name) %>%
    select(
      Sample,
      `Cell type 1`             = str_c(type_column, ".y"),
      `Median OVA FC 1 (log10)` = med_1,
      `Cell type 2`             = type_column,
      `Median OVA FC 2 (log10)` = med_2,
      p.value
    )
  
  res
}


# Parameter lists
fig3_names <- c("d2_DC", "d14_DC", "d14_LEC", "d14_FRC")
fig3_sobjs <- sobjs[fig3_names]

fig3_params <- list(
  sobj_in    = fig3_sobjs, 
  plot_title = so_titles[names(fig3_sobjs)],
  cols_in    = so_cols[names(fig3_sobjs)]
)

corr_params <- list(
  sobj_in    = sobjs, 
  plot_title = so_titles[names(sobjs)], 
  cols_in    = so_cols[names(sobjs)]
)

n_plot <- 0

Figure 4

LEC markers associated with high antigen counts. (a) LECs containing low and high antigen counts were identified using a gaussian mixture model. A UMAP projection is shown for ova-low and ova-high cells. T cells, B cells, and epithelial cells are shown in white (Other). (b) The distribution of ova antigen counts is shown for ova-low and ova-high cells. Dotted lines indicate the mean counts for each population. (c) The fraction of cells belonging to each LEC subtype is shown for ova-low and ova-high populations. (d) Gene expression counts were plotted for select genes associated with ova-high LECs. (e) The expression of ova-high LEC markers is shown for each LEC subtype.

v1




v2




v3




v4




v5




LEC clustering

Clustering for LEC subtype assignment using Xiang et al. reference

# Load reference
load(here(params$sobj_refs$LEC))

so_in <- sobjs[["d14_LEC"]]

# Get subtypes for different resolutions/thresholds
reslns <- c(0.6, 1, 1.4, 1.8, 2.2)
r <- seq(0.5, 0.8, 0.1)

res_df <- expand.grid(reslns, r) %>%
  rename(resln = Var1, r_thresh = Var2) %>%
  as_tibble()

plot_df <- map2(res_df$resln, res_df$r_thresh, ~ {
  res <- so_in %>%
    FindClusters(
      resolution = .x,
      verbose    = F
    ) %>%
    clustify(
      ref_mat     = ref_LEC_xiang,
      cluster_col = "seurat_clusters",
      threshold   = .y
    )
  
  res <- res@meta.data %>%
    as_tibble(rownames = "cell_id") %>%
    mutate(
      subtype  = if_else(cell_type == "LEC", type.clustify, subtype),
      subtype  = str_to_title_v2(subtype),
      UMAP_1   = UMAP_1.x,
      UMAP_2   = UMAP_2.x,
      n_clust  = str_c(n_distinct(seurat_clusters), " (", .x, ")"),
      r        = r.clustify,
      r_thresh = .y
    ) %>%
    select(
      cell_id, orig.ident, seurat_clusters, 
      cell_type, subtype, n_clust,
      r, r_thresh, UMAP_1, UMAP_2
    )
  
  res
}) %>%
  bind_rows()

# Subtype UMAPs
subtype_gg <- plot_df %>%
  plot_features(
    feature    = "subtype",
    pt_outline = 0.5,
    pt_size    = 0.05,
    plot_cols  = so_cols[["d14_LEC"]],
    split_id   = c("n_clust", "r_thresh"), 
    switch     = "y"
  ) +
  blank_theme +
  labs(subtitle = "Clustifyr Threshold", y = "Number of Clusters") +
  guides(color = outline_guide) +
  blank_theme +
  theme(
    plot.subtitle = element_text(hjust = 0.5),
    panel.border  = element_rect(fill = NA, color = "black", size = 0.2),
    legend.title  = element_blank(),
    axis.title.y  = element_text()
  )

# Reference UMAPs
clust_guide <- outline_guide
clust_guide$ncol <- 1

ref_gg <- plot_df %>%
  mutate(r_thresh = "Clusters") %>%
  unique() %>%
  plot_features(
    feature    = "seurat_clusters",
    pt_outline = 0.5,
    pt_size    = 0.05,
    split_id   = c("n_clust", "r_thresh"), 
    switch     = "y"
  ) +
  blank_theme +
  labs(y = "Number of Clusters") +
  guides(fill = clust_guide) +
  blank_theme +
  theme(
    plot.subtitle = element_text(hjust = 0.5),
    panel.border  = element_rect(fill = NA, color = "black", size = 0.2),
    legend.title  = element_blank(),
    axis.title.y  = element_text()
  )

# Correlation UMAPs
cor_df <- plot_df %>%
  mutate(r_thresh = "Correlation") %>%
  unique()

cor_gg <- cor_df %>%
  ggplot(aes(UMAP_1, UMAP_2, color = r))

walk(unique(cor_df$seurat_clusters), ~ {
  pt_1 <- cor_df %>%
    filter(
      seurat_clusters == .x,
      r <= 0.6
    )
  
  pt_2 <- cor_df %>%
    filter(
      seurat_clusters == .x,
      r > 0.6
    )
  
  cor_gg <<- cor_gg +
    geom_point(data = pt_1, color = "black", size = 0.5) +
    geom_point(data = pt_1, color = "white", size = 0.05) +
    geom_point(data = pt_2, color = "black", size = 0.5) +
    geom_point(data = pt_2, size = 0.05)
})

cor_gg <- cor_gg +
  facet_grid(n_clust ~ r_thresh, switch = "y") +
  labs(y = "Number of Clusters") +
  blank_theme +
  theme(
    panel.border = element_rect(fill = NA, color = "black", size = 0.2),
    legend.title = element_blank(),
    axis.title.y = element_text()
  )

# Create final figure
plot_grid(
  ref_gg, cor_gg, subtype_gg,
  rel_widths = c(0.4, 0.4, 1),
  nrow  = 1,
  align = "vh",
  axis  = "tbrl"
)